from pyspark.context import SparkContext
import json

sc = SparkContext(master="local", appName="word_count")

rdd1 = sc.parallelize([1,2,3,4,5,6])
rdd2 = sc.parallelize([2,4,5,6,7,89,1])


union_rdd = rdd1.union(rdd2)

print(f'union_rdd:{union_rdd.getNumPartitions()}')


union_rdd.foreach(print)

distinct_rdd = union_rdd.distinct()

distinct_rdd.foreach(print)
